from langchain_ollama import OllamaLLM
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
import os, time, json

OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://127.0.0.1:11434')  # 从环境变量读取

# 初始化 Ollama 模型并设置流式输出
ollama = OllamaLLM(
    base_url=OLLAMA_HOST,
    model="modelscope2ollama-registry.azurewebsites.net/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
    streaming=True
)

prompt = ChatPromptTemplate.from_messages([
    ("human", "请介绍一下{topic}")
])
chain = (
    {"topic": RunnablePassthrough()}
    | prompt
    | ollama
)

topic = "人工智能"
for chunk in chain.stream(topic):
    print(chunk, end="", flush=True)

    